from datetime import datetime
import requests, re, pymysql
from requests.api import patch
from requests_html import HTMLSession

conn = pymysql.connect(
    host="172.16.10.201",
    user="zy001",
    port=3306,
    password="zy@123",
    database="crawl",
    charset='utf8'
)

headers = {
    'accept-encoding': 'gzip, deflate, br',
    'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36'
}

insert_sql = '''insert into smoke_content(smoke_id,url,nickname,level,comment_time,address,price,unit,content,create_time,update_time)values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)'''


# 获取有几页评论,页数，拼接路由
def creare_url(productid):
    url = 'https://www.yanyue.cn/comment/' + str(productid)
    req = session.get(url, headers=headers)
    pagenum = req.html.xpath('//div/nav/div[1]/text()[1]')
    url_list = []
    # 表示没有评论
    if not pagenum:
        return None
    pagenum = int(re.findall('\d+', pagenum[0])[0])
    for i in range(1, pagenum // 10 + 2):
        url = 'https://www.yanyue.cn/product/comment/productid/' + str(productid) + '/p/' + str(i)
        url_list.append(url)
    return list(url_list)


def save_data(url, smoke_id):
    req = session.get(url, headers=headers)
    content_list = req.html.xpath('//*[@id="commentswrap"]/li')
    data_list = []
    for i in content_list:
        nickname = i.xpath('//p/span[@class="username"]', first=True).text
        comment_time = i.xpath('//p/span')[1].text
        address = i.attrs.get('title')
        level = i.xpath('//p/a')[0].text
        price = 0
        unit = ' '
        if i.xpath("//p/span[@class='price_num']"):
            price = float(i.xpath("//p/span[@class='price_num']")[0].text)
            unit = '元/包'
        content = i.xpath("//p[@class='commenttext']")[0].text
        value = (
        smoke_id, url, nickname, level, comment_time, address, price, unit, content, datetime.now(), datetime.now())

        result = {'smoke_id': smoke_id, 'url': url, 'nickname': nickname, 'level': level, 'comment_time': comment_time,
                  'address': address, 'price': price, 'unit': unit, 'content': content, 'create_time': datetime.now(),
                  'update_time': datetime.now()}
        save_item(result)

        data_list.append(value)
    cursor.executemany(insert_sql, data_list)
    conn.commit()
    return '存储成功'


if __name__ == '__main__':
    session = HTMLSession()
    cursor = conn.cursor()
    sql = "select id, url from smoke"
    cursor.execute(sql)
    for i in cursor.fetchall():
        id = i[0]
        productid = int(re.findall('\d+', i[1])[0])
        url_list = creare_url(productid=productid)
        if url_list:
            for i in url_list:
                print(i)
                save_data(i, smoke_id=id)
